{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "toc": "true"
   },
   "source": [
    "# Table of Contents\n",
    " <p><div class=\"lev1\"><a href=\"#Supervised-Topic-Model\"><span class=\"toc-item-num\">1 - </span>Supervised Topic Model</a></div><div class=\"lev2\"><a href=\"#Read-and-tokenize-moview-review-dataset\"><span class=\"toc-item-num\">1.1 - </span>Read and tokenize moview review dataset</a></div><div class=\"lev2\"><a href=\"#Infer-topics-with-SupervisedLDA\"><span class=\"toc-item-num\">1.2 - </span>Infer topics with SupervisedLDA</a></div>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Supervised Topic Model  "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This notebook shows an example of supervised topic model with the dataset provided by [Bo Pang](https://www.cs.cornell.edu/people/pabo/movie-review-data/)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import logging\n",
    "\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "from ptm import GibbsSupervisedLDA\n",
    "from ptm.nltk_corpus import get_ids_cnt\n",
    "from ptm.utils import convert_cnt_to_list, get_top_words\n",
    "\n",
    "%matplotlib inline  \n",
    "\n",
    "logger = logging.getLogger('GibbsSupervisedLDA')\n",
    "logger.propagate = False"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Read and tokenize moview review dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "datafolder = '../data/scaledata/Dennis+Schwartz/'\n",
    "rating_file = os.path.join(datafolder, 'rating.Dennis+Schwartz')\n",
    "review_file = os.path.join(datafolder, 'subj.Dennis+Schwartz')\n",
    "with open(rating_file, 'r') as f:\n",
    "    ratings = np.array([float(line.strip()) for line in f.readlines()])\n",
    "with open(review_file, 'r') as f:\n",
    "    reviews = [line for line in f.readlines()]\n",
    "\n",
    "voca, word_ids, word_cnt = get_ids_cnt(reviews)\n",
    "corpus = convert_cnt_to_list(word_ids, word_cnt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "num doc 1027 num_voca 10526\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEACAYAAACwB81wAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAEfZJREFUeJzt3XmsXGd9xvHvAyYRW01Y4tvGkFBCIUHQkBKTCiQGaENS\ntXHEH2kAVSylRaIsKqBio1Z2VwgSUamqVCpbDYKmKbRJ2E0II5qSBchCwG5qaG2Ciy8QlhJFIIf8\n+scch+nlJnfuMj5jv9+PNMqZc8/yeG7muee+c849qSokSUe/+/UdQJJ0eFj4ktQIC1+SGmHhS1Ij\nLHxJaoSFL0mNWLLwkxyb5LokNyb5SpK/6uYfl2RnkluTfDLJ+rF1tibZk2R3krOm+Q+QJE0mk5yH\nn+RBVXVnkvsD/w68HjgXuL2q3prkjcBxVbUlyanA+4EzgI3AlcDjyxP+JalXEw3pVNWd3eSx3Trf\nAzYDO7r5O4DzuulzgUuq6q6q2gvsATatVWBJ0spMVPhJ7pfkRuAAMKyqXcCGqpoHqKoDwPHd4icA\nt42tvr+bJ0nq0bpJFqqqu4GnJvk54JNJBsDCIRqHbCRphk1U+IdU1f8m+RjwNGA+yYaqmk8yB3yr\nW2w/8Oix1TZ28/6fJP6AkKQVqKqsZL1JztJ55KEzcJI8EPh14EbgCuAl3WIvBi7vpq8ALkhyTJLH\nAicD199L6Jl7bNu2rfcMZjJTi7nMNNljNSY5wv95YEeSMPoB8b6q+nQ3pn9pkpcB+4DzuxLfleRS\nYBdwEHhlrTalJGnVliz8qroFOH2R+d8Ffu1e1nkz8OZVp5MkrRmvtF1gMBj0HeFnmGkyZprcLOYy\n0/RNdOHVVHacONIjScuUhJrWh7aSpKODhS9JjbDwJakRFr4kNcLCl6RGWPiS1AgLX5IaYeFLUiMs\nfElqhIUvSY2w8CWpERa+JDXCwpekRlj4ktQIC1+SGmHhS1IjLHxJaoSFL0mNsPAlqREWviQ1wsKX\npEZY+JLUCAtfkhph4UtSIyx8SWqEhS9JjViy8JNsTHJVkq8kuSXJq7v525J8I8kN3ePssXW2JtmT\nZHeSs6b5D5AkTSZVdd8LJHPAXFXdlOQhwBeBzcBvAz+sqosWLH8K8AHgDGAjcCXw+FqwoyQLZ0mS\nlpCEqspK1l3yCL+qDlTVTd30HcBu4IRD+15klc3AJVV1V1XtBfYAm1YSTpK0dpY1hp/kJOA04Lpu\n1quS3JTknUnWd/NOAG4bW20/P/0BIR1R5uZOIkmvj7m5k/p+GXSUmLjwu+GcDwKv7Y70LwZ+sapO\nAw4Ab5tORKk/8/P7gOr1Mcogrd66SRZKso5R2b+vqi4HqKpvjy3yDuDD3fR+4NFjX9vYzfsZ27dv\nv2d6MBgwGAwmjC1JbRgOhwyHwzXZ1pIf2gIkeS/wnap63di8uao60E3/IXBGVb0wyanA+4GnMxrK\n+RR+aKsjVBJGR9q9psD3ig5ZzYe2Sx7hJ3kG8CLgliQ3Mvq//03AC5OcBtwN7AVeAVBVu5JcCuwC\nDgKvtNklqX8THeFPZcce4esI4BG+Zs1UT8uUJB0dLHxJaoSFL0mNsPAlqREWviQ1wsKXpEZY+JLU\nCAtfkhph4UtSIyx8SWqEhS9JjbDwJakRFr4kNcLCl6RGWPiS1AgLX5IaYeFLUiMsfElqhIUvSY2w\n8CWpERa+JDXCwpekRlj4ktQIC1+SGmHhS1IjLHxJaoSFL0mNsPAlqREWviQ1YsnCT7IxyVVJvpLk\nliSv6eYfl2RnkluTfDLJ+rF1tibZk2R3krOm+Q+QJE0mVXXfCyRzwFxV3ZTkIcAXgc3AS4Hbq+qt\nSd4IHFdVW5KcCrwfOAPYCFwJPL4W7CjJwlnSzEkC9P3/afC9okOSUFVZybpLHuFX1YGquqmbvgPY\nzajINwM7usV2AOd10+cCl1TVXVW1F9gDbFpJOEnS2lnWGH6Sk4DTgGuBDVU1D6MfCsDx3WInALeN\nrba/mydJ6tG6SRfshnM+CLy2qu5IsvB3zGX/zrl9+/Z7pgeDAYPBYLmbkKSj2nA4ZDgcrsm2lhzD\nB0iyDvgI8PGqens3bzcwqKr5bpz/M1V1SpItQFXVhd1ynwC2VdV1C7bpGL5mnmP4mjVTHcPvvBvY\ndajsO1cAL+mmXwxcPjb/giTHJHkscDJw/UrCSZLWziRn6TwD+CxwC6NDnQLexKjELwUeDewDzq+q\n73frbAV+FzjIaAho5yLb9QhfM88jfM2a1RzhTzSkMw0Wvo4EFr5mzeEY0pEkHeEsfElqhIUvSY2w\n8CWpERNfeCUdbnNzJzE/v6/vGNJRw7N0NLNm5QyZWcjge0WHeJaOJGlJFr4kNcLCl6RGWPiS1AgL\nX5IaYeFLUiMsfElqhIUvSY3wSltp5h3bXYTWnw0bTuTAgb29ZtDqeaWtZpZX2s5WBt+vs8ErbSVJ\nS7LwJakRFr4kNcLCl6RGWPiS1AgLX5IaYeFLUiMsfElqhIUvSY2w8CWpERa+JDXCwpekRixZ+Ene\nlWQ+yZfG5m1L8o0kN3SPs8e+tjXJniS7k5w1reCSpOWZ5Aj/PcDzFpl/UVWd3j0+AZDkFOB84BTg\nHODi9P13XSVJwASFX1VXA99b5EuLFflm4JKququq9gJ7gE2rSihJWhOrGcN/VZKbkrwzyfpu3gnA\nbWPL7O/mSZJ6ttI7Xl0M/FlVVZK/AN4GvHy5G9m+ffs904PBgMFgsMI4knR0Gg6HDIfDNdnWRHe8\nSnIi8OGqesp9fS3JFqCq6sLua58AtlXVdYus5x2vdJ+849VsZfD9OhsOxx2vwtiYfZK5sa89H/hy\nN30FcEGSY5I8FjgZuH4lwSRJa2vJIZ0kHwAGwCOSfB3YBjw7yWnA3cBe4BUAVbUryaXALuAg8EoP\n4yVpNngTc80sh3RmK4Pv19ngTcwlSUuy8CWpERa+JDXCwpekRlj4ktSIlV5pq6Pc3NxJzM/v6zuG\npDXkaZlalKdEmmFhBt+vs8HTMiVJS7LwJakRFr4kNcLCl6RGWPiS1AhPy5Q0gWPp+/bUGzacyIED\ne3vNcKTztEwtytMyzTCLGewMT8uUJE3AwpekRlj4ktQIC1+SGmHhS1IjLHxJaoSFL0mNsPAlqREW\nviQ1wsKXpEZY+JLUCAtfkhph4UtSIyx8SWrEkoWf5F1J5pN8aWzecUl2Jrk1ySeTrB/72tYke5Ls\nTnLWtIJLkpZnkiP89wDPWzBvC3BlVT0BuArYCpDkVOB84BTgHODi9H3XBEkSMEHhV9XVwPcWzN4M\n7OimdwDnddPnApdU1V1VtRfYA2xam6iSpNVY6Rj+8VU1D1BVB4Dju/knALeNLbe/mydJ6tla3dN2\nRfcd2759+z3Tg8GAwWCwRnEk6egwHA4ZDodrsq2J7mmb5ETgw1X1lO75bmBQVfNJ5oDPVNUpSbYA\nVVUXdst9AthWVdctsk3vaTvDvKetGWYxg51xeO5pm+5xyBXAS7rpFwOXj82/IMkxSR4LnAxcv5Jg\nkqS1teSQTpIPAAPgEUm+DmwD3gL8c5KXAfsYnZlDVe1KcimwCzgIvNLDeEmaDRMN6Uxlxw7pzDSH\ndMwwixnsjMMzpCNJOsJZ+JLUCAtfkhph4UtSI9bqwitJmrJj6ftPc23YcCIHDuztNcNqeJaOFuVZ\nOmYww+IZ+u4tz9KRJC3JwpekRlj4ktQIC1+SGmHhS1IjLHxJaoSFL0mNsPAlqREWviQ1wsKXpEZY\n+JLUCAtfkhph4UtSIyx8SWqEhS9JjbDwJakRFr4kNcLCl6RGWPiS1AgLX5IaYeFLUiMsfElqxLrV\nrJxkL/AD4G7gYFVtSnIc8E/AicBe4Pyq+sEqc0qSVmm1R/h3A4OqempVbermbQGurKonAFcBW1e5\nD0nSGlht4WeRbWwGdnTTO4DzVrkPSdIaWG3hF/CpJJ9P8vJu3oaqmgeoqgPA8avchyRpDaxqDB94\nRlV9M8mjgJ1JbmX0Q2DcwueSpB6sqvCr6pvdf7+d5DJgEzCfZENVzSeZA751b+tv3779nunBYMBg\nMFhNHEk66gyHQ4bD4ZpsK1UrOwBP8iDgflV1R5IHAzuBPwWeC3y3qi5M8kbguKrassj6tdJ9a/qS\n0P8vZ2Yww+xl6Lu3klBVWcm6qznC3wD8a5LqtvP+qtqZ5AvApUleBuwDzl/FPiRJa2TFR/ir3rFH\n+DPNI3wzmGHxDH331mqO8L3SVpIaYeFLUiMsfElqhIUvSY2w8CWpERa+JDXCwpekRlj4ktSI1f7x\nNE3B3NxJzM/v6zuGpKOMV9rOIK9yNYMZZjdD373llbaSpCVZ+JLUCAtfkhph4UtSIzxLR5Imdmx3\nUsWRycKXpIn9mFk4U2ilHNKRpEZY+JLUCAtfkhph4UtSIyx8SWqEhS9JjbDwJakRFr4kNcLCl6RG\nWPiS1AgLX5IaYeFLUiOmVvhJzk7yH0n+M8kbp7UfSdJkplL4Se4H/C3wPOBJwAuSPHEa+1prw+Gw\n7wiLGPYdYBHDvgMsYth3gEUM+w5wL4Z9B1jEsO8Aixj2HWBNTesIfxOwp6r2VdVB4BJg85T2taYs\n/EkN+w6wiGHfARYx7DvAvRj2HWARw74DLGLYd4A1Na2/h38CcNvY828w+iEw0y677DKuueYaLrro\not4yPOpRj+pt35KObt4AZcwFF7yIH//4Tnbu3Nl3FElac6la+7u3JDkT2F5VZ3fPtwBVVReOLdP3\nbWMk6YhUVSu67dW0Cv/+wK3Ac4FvAtcDL6iq3Wu+M0nSRKYypFNVP0nyKmAnow+G32XZS1K/pnKE\nL0maPVO/0naSC7CS/E2SPUluSnJa35mSPCHJ55L8KMnrpp1nwkwvTHJz97g6yZNnJNe5XaYbk3wh\nyXP6zjS23BlJDiZ5ft+ZkjwryfeT3NA9/rjvTN0yg+579+Ukn+k7U5I3dHluSHJLkruSPGwGcj0i\nyce7jrolyUtmINPDkvxL9/67NsmpS260qqb2YPQD5avAicADgJuAJy5Y5hzgo93004FrZyDTI4Ff\nAf4ceN008ywj05nA+m767Gm/TsvI9aCx6ScDX+0709hynwY+Ajy/70zAs4Arpv09W2am9cBXgBO6\n54/sO9OC5X8TuHJGXqttwJsPvU7A7cC6njO9FfiTbvoJk7xW0z7Cn+QCrM3AewGq6jpgfZINfWaq\nqu9U1ReBu6aYY7mZrq2qH3RPr2V0rcMs5Lpz7OlDgO/0nanzauCDwLemnGc5mVZ0ZsUUM70Q+FBV\n7YfR//czkGncC4B/nHKmSXMdAB7aTT8UuL2qptkPk2Q6FbgKoKpuBU5Kcp8X8ky78Be7AGthUS1c\nZv8iyxzuTIfbcjO9HPj4VBONTJQryXlJdgMfA17Td6YkvwCcV1V/x+Ep2Um/f7/aDQl8dKJfv6ef\n6ZeAhyf5TJLPJ/mdGcgEQJIHMvpN9kNTzjRprncAT0ryP8DNwGtnINPNwPMBkmwCHgNsvK+NeuHV\nESbJs4GXAs/sO8shVXUZcFmSZwLvY/TrZZ/+Ghgf8zycR9b35ovAY6rqziTnAJcxKtw+rQNOB54D\nPBi4Jsk1VfXVfmMB8FvA1VX1/b6DdLYCN1fVs5M8DvhUkqdU1R09ZnoL8PYkNwC3ADcCP7mvFaZd\n+PsZ/dQ5ZGM3b+Eyj15imcOd6XCbKFOSpwB/D5xdVd+blVyHVNXVSdYleURV3d5jpqcBlyQJo/HW\nc5IcrKor+so0XgxV9fEkFyd5eFV9t69MjI4av1NVPwJ+lOSzwC8zGjvuK9MhF3B4hnNgslzPAP4S\noKq+luS/gScCX+grU1X9EHjZoeddpv+6z61O+cOQ+/PTDx6OYfTBwykLlvkNfvqh7ZlM/0PbJTON\nLbsNeP008yzjdXoMsAc4c9p5lpnrcWPTpwNf6zvTguXfw/Q/tJ3kddowNr0J2DsDmZ4IfKpb9kGM\njhJP7ft7x+jD5NuBB07zNVrma/U2YNuh7yWj4ZaH95xpPfCAbvr3gH9YcruH4cU8m9FVt3uALd28\nVwC/P7bM33b/uJuB0/vONPYN/T7wXeDrwEN6zvSO7k1wA6Nf3a6f9us0Ya4/Ar7c5fo34Gl9Z1qw\n7LuZcuFP+Dr9Qfc63Qh8Dnh635m6529gdKbOl4BXz0imFwMfmHaWZX7/Hgl8uOuoLzH6ywF9Zzqz\n+/puRicorF9qm154JUmN8BaHktQIC1+SGmHhS1IjLHxJaoSFL0mNsPAlqREWviQ1wsKXpEb8H8B6\n+LYT4HUJAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x10e953048>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "max rating 0.9 \tmin rating 0.1\n"
     ]
    }
   ],
   "source": [
    "n_doc = len(corpus)\n",
    "n_voca = voca.size\n",
    "print('num doc', n_doc, 'num_voca', n_voca)\n",
    "plt.hist(ratings, bins=9)\n",
    "plt.show()\n",
    "print('max rating', np.max(ratings), '\\tmin rating', np.min(ratings))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Infer topics with SupervisedLDA"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2016-02-10 19:43:04 INFO:GibbsSupervisedLDA:[ITER] 0,\tMAE:0.07,\tlog_likelihood:-1200104.10\n",
      "2016-02-10 19:43:13 INFO:GibbsSupervisedLDA:[ITER] 1,\tMAE:0.07,\tlog_likelihood:-1115555.47\n",
      "2016-02-10 19:43:22 INFO:GibbsSupervisedLDA:[ITER] 2,\tMAE:0.07,\tlog_likelihood:-1073563.29\n",
      "2016-02-10 19:43:32 INFO:GibbsSupervisedLDA:[ITER] 3,\tMAE:0.07,\tlog_likelihood:-1048468.36\n",
      "2016-02-10 19:43:41 INFO:GibbsSupervisedLDA:[ITER] 4,\tMAE:0.07,\tlog_likelihood:-1032117.31\n",
      "2016-02-10 19:43:49 INFO:GibbsSupervisedLDA:[ITER] 5,\tMAE:0.08,\tlog_likelihood:-1020609.06\n",
      "2016-02-10 19:43:58 INFO:GibbsSupervisedLDA:[ITER] 6,\tMAE:0.08,\tlog_likelihood:-1012588.62\n",
      "2016-02-10 19:44:07 INFO:GibbsSupervisedLDA:[ITER] 7,\tMAE:0.08,\tlog_likelihood:-1006078.19\n",
      "2016-02-10 19:44:17 INFO:GibbsSupervisedLDA:[ITER] 8,\tMAE:0.08,\tlog_likelihood:-1000468.34\n",
      "2016-02-10 19:44:26 INFO:GibbsSupervisedLDA:[ITER] 9,\tMAE:0.08,\tlog_likelihood:-996064.31\n",
      "2016-02-10 19:44:35 INFO:GibbsSupervisedLDA:[ITER] 10,\tMAE:0.08,\tlog_likelihood:-991448.64\n",
      "2016-02-10 19:44:44 INFO:GibbsSupervisedLDA:[ITER] 11,\tMAE:0.09,\tlog_likelihood:-989309.68\n",
      "2016-02-10 19:44:53 INFO:GibbsSupervisedLDA:[ITER] 12,\tMAE:0.08,\tlog_likelihood:-986026.11\n",
      "2016-02-10 19:45:02 INFO:GibbsSupervisedLDA:[ITER] 13,\tMAE:0.08,\tlog_likelihood:-983278.74\n",
      "2016-02-10 19:45:12 INFO:GibbsSupervisedLDA:[ITER] 14,\tMAE:0.08,\tlog_likelihood:-980648.03\n",
      "2016-02-10 19:45:20 INFO:GibbsSupervisedLDA:[ITER] 15,\tMAE:0.09,\tlog_likelihood:-978593.27\n",
      "2016-02-10 19:45:29 INFO:GibbsSupervisedLDA:[ITER] 16,\tMAE:0.09,\tlog_likelihood:-977028.47\n",
      "2016-02-10 19:45:38 INFO:GibbsSupervisedLDA:[ITER] 17,\tMAE:0.09,\tlog_likelihood:-975142.51\n",
      "2016-02-10 19:45:47 INFO:GibbsSupervisedLDA:[ITER] 18,\tMAE:0.09,\tlog_likelihood:-974758.06\n",
      "2016-02-10 19:45:57 INFO:GibbsSupervisedLDA:[ITER] 19,\tMAE:0.08,\tlog_likelihood:-972879.10\n",
      "2016-02-10 19:46:05 INFO:GibbsSupervisedLDA:[ITER] 20,\tMAE:0.09,\tlog_likelihood:-971601.43\n",
      "2016-02-10 19:46:15 INFO:GibbsSupervisedLDA:[ITER] 21,\tMAE:0.09,\tlog_likelihood:-970929.30\n",
      "2016-02-10 19:46:24 INFO:GibbsSupervisedLDA:[ITER] 22,\tMAE:0.08,\tlog_likelihood:-969826.58\n",
      "2016-02-10 19:46:34 INFO:GibbsSupervisedLDA:[ITER] 23,\tMAE:0.08,\tlog_likelihood:-968402.00\n",
      "2016-02-10 19:46:43 INFO:GibbsSupervisedLDA:[ITER] 24,\tMAE:0.08,\tlog_likelihood:-968001.73\n",
      "2016-02-10 19:46:52 INFO:GibbsSupervisedLDA:[ITER] 25,\tMAE:0.08,\tlog_likelihood:-967423.17\n",
      "2016-02-10 19:47:02 INFO:GibbsSupervisedLDA:[ITER] 26,\tMAE:0.08,\tlog_likelihood:-966159.14\n",
      "2016-02-10 19:47:12 INFO:GibbsSupervisedLDA:[ITER] 27,\tMAE:0.08,\tlog_likelihood:-965307.04\n",
      "2016-02-10 19:47:21 INFO:GibbsSupervisedLDA:[ITER] 28,\tMAE:0.08,\tlog_likelihood:-964563.86\n",
      "2016-02-10 19:47:31 INFO:GibbsSupervisedLDA:[ITER] 29,\tMAE:0.08,\tlog_likelihood:-963570.42\n",
      "2016-02-10 19:47:42 INFO:GibbsSupervisedLDA:[ITER] 30,\tMAE:0.08,\tlog_likelihood:-963055.59\n",
      "2016-02-10 19:47:52 INFO:GibbsSupervisedLDA:[ITER] 31,\tMAE:0.08,\tlog_likelihood:-962735.30\n",
      "2016-02-10 19:48:02 INFO:GibbsSupervisedLDA:[ITER] 32,\tMAE:0.08,\tlog_likelihood:-961139.44\n",
      "2016-02-10 19:48:12 INFO:GibbsSupervisedLDA:[ITER] 33,\tMAE:0.08,\tlog_likelihood:-960502.03\n",
      "2016-02-10 19:48:22 INFO:GibbsSupervisedLDA:[ITER] 34,\tMAE:0.08,\tlog_likelihood:-959892.16\n",
      "2016-02-10 19:48:32 INFO:GibbsSupervisedLDA:[ITER] 35,\tMAE:0.08,\tlog_likelihood:-959321.29\n",
      "2016-02-10 19:48:42 INFO:GibbsSupervisedLDA:[ITER] 36,\tMAE:0.08,\tlog_likelihood:-959294.60\n",
      "2016-02-10 19:48:51 INFO:GibbsSupervisedLDA:[ITER] 37,\tMAE:0.08,\tlog_likelihood:-958416.57\n",
      "2016-02-10 19:49:00 INFO:GibbsSupervisedLDA:[ITER] 38,\tMAE:0.08,\tlog_likelihood:-958708.81\n",
      "2016-02-10 19:49:09 INFO:GibbsSupervisedLDA:[ITER] 39,\tMAE:0.08,\tlog_likelihood:-958331.49\n",
      "2016-02-10 19:49:18 INFO:GibbsSupervisedLDA:[ITER] 40,\tMAE:0.08,\tlog_likelihood:-957697.51\n",
      "2016-02-10 19:49:28 INFO:GibbsSupervisedLDA:[ITER] 41,\tMAE:0.08,\tlog_likelihood:-956916.19\n",
      "2016-02-10 19:49:37 INFO:GibbsSupervisedLDA:[ITER] 42,\tMAE:0.08,\tlog_likelihood:-955973.23\n",
      "2016-02-10 19:49:46 INFO:GibbsSupervisedLDA:[ITER] 43,\tMAE:0.08,\tlog_likelihood:-955332.56\n",
      "2016-02-10 19:49:54 INFO:GibbsSupervisedLDA:[ITER] 44,\tMAE:0.08,\tlog_likelihood:-955296.00\n",
      "2016-02-10 19:50:03 INFO:GibbsSupervisedLDA:[ITER] 45,\tMAE:0.08,\tlog_likelihood:-955303.42\n",
      "2016-02-10 19:50:12 INFO:GibbsSupervisedLDA:[ITER] 46,\tMAE:0.08,\tlog_likelihood:-954540.14\n",
      "2016-02-10 19:50:22 INFO:GibbsSupervisedLDA:[ITER] 47,\tMAE:0.08,\tlog_likelihood:-954232.94\n",
      "2016-02-10 19:50:32 INFO:GibbsSupervisedLDA:[ITER] 48,\tMAE:0.08,\tlog_likelihood:-952474.63\n",
      "2016-02-10 19:50:41 INFO:GibbsSupervisedLDA:[ITER] 49,\tMAE:0.08,\tlog_likelihood:-952360.92\n",
      "2016-02-10 19:50:51 INFO:GibbsSupervisedLDA:[ITER] 50,\tMAE:0.08,\tlog_likelihood:-953415.19\n",
      "2016-02-10 19:51:00 INFO:GibbsSupervisedLDA:[ITER] 51,\tMAE:0.08,\tlog_likelihood:-952345.91\n",
      "2016-02-10 19:51:10 INFO:GibbsSupervisedLDA:[ITER] 52,\tMAE:0.08,\tlog_likelihood:-952259.97\n",
      "2016-02-10 19:51:20 INFO:GibbsSupervisedLDA:[ITER] 53,\tMAE:0.08,\tlog_likelihood:-952232.09\n",
      "2016-02-10 19:51:30 INFO:GibbsSupervisedLDA:[ITER] 54,\tMAE:0.08,\tlog_likelihood:-952488.02\n",
      "2016-02-10 19:51:40 INFO:GibbsSupervisedLDA:[ITER] 55,\tMAE:0.08,\tlog_likelihood:-951400.56\n",
      "2016-02-10 19:51:49 INFO:GibbsSupervisedLDA:[ITER] 56,\tMAE:0.08,\tlog_likelihood:-951612.91\n",
      "2016-02-10 19:51:59 INFO:GibbsSupervisedLDA:[ITER] 57,\tMAE:0.08,\tlog_likelihood:-951843.57\n",
      "2016-02-10 19:52:08 INFO:GibbsSupervisedLDA:[ITER] 58,\tMAE:0.08,\tlog_likelihood:-951312.42\n",
      "2016-02-10 19:52:18 INFO:GibbsSupervisedLDA:[ITER] 59,\tMAE:0.08,\tlog_likelihood:-951363.82\n",
      "2016-02-10 19:52:28 INFO:GibbsSupervisedLDA:[ITER] 60,\tMAE:0.08,\tlog_likelihood:-950682.99\n",
      "2016-02-10 19:52:38 INFO:GibbsSupervisedLDA:[ITER] 61,\tMAE:0.08,\tlog_likelihood:-950734.54\n",
      "2016-02-10 19:52:47 INFO:GibbsSupervisedLDA:[ITER] 62,\tMAE:0.08,\tlog_likelihood:-950539.13\n",
      "2016-02-10 19:52:57 INFO:GibbsSupervisedLDA:[ITER] 63,\tMAE:0.08,\tlog_likelihood:-950733.91\n",
      "2016-02-10 19:53:07 INFO:GibbsSupervisedLDA:[ITER] 64,\tMAE:0.08,\tlog_likelihood:-949927.04\n",
      "2016-02-10 19:53:16 INFO:GibbsSupervisedLDA:[ITER] 65,\tMAE:0.08,\tlog_likelihood:-949374.16\n",
      "2016-02-10 19:53:25 INFO:GibbsSupervisedLDA:[ITER] 66,\tMAE:0.08,\tlog_likelihood:-949330.23\n",
      "2016-02-10 19:53:35 INFO:GibbsSupervisedLDA:[ITER] 67,\tMAE:0.08,\tlog_likelihood:-948267.36\n",
      "2016-02-10 19:53:44 INFO:GibbsSupervisedLDA:[ITER] 68,\tMAE:0.08,\tlog_likelihood:-949421.16\n",
      "2016-02-10 19:53:53 INFO:GibbsSupervisedLDA:[ITER] 69,\tMAE:0.08,\tlog_likelihood:-948148.44\n",
      "2016-02-10 19:54:02 INFO:GibbsSupervisedLDA:[ITER] 70,\tMAE:0.08,\tlog_likelihood:-947131.47\n",
      "2016-02-10 19:54:11 INFO:GibbsSupervisedLDA:[ITER] 71,\tMAE:0.08,\tlog_likelihood:-947165.15\n",
      "2016-02-10 19:54:19 INFO:GibbsSupervisedLDA:[ITER] 72,\tMAE:0.08,\tlog_likelihood:-947004.44\n",
      "2016-02-10 19:54:28 INFO:GibbsSupervisedLDA:[ITER] 73,\tMAE:0.08,\tlog_likelihood:-947023.80\n",
      "2016-02-10 19:54:36 INFO:GibbsSupervisedLDA:[ITER] 74,\tMAE:0.08,\tlog_likelihood:-946379.40\n",
      "2016-02-10 19:54:45 INFO:GibbsSupervisedLDA:[ITER] 75,\tMAE:0.08,\tlog_likelihood:-946648.86\n",
      "2016-02-10 19:54:53 INFO:GibbsSupervisedLDA:[ITER] 76,\tMAE:0.08,\tlog_likelihood:-947049.11\n",
      "2016-02-10 19:55:01 INFO:GibbsSupervisedLDA:[ITER] 77,\tMAE:0.08,\tlog_likelihood:-946696.94\n",
      "2016-02-10 19:55:10 INFO:GibbsSupervisedLDA:[ITER] 78,\tMAE:0.08,\tlog_likelihood:-947052.36\n",
      "2016-02-10 19:55:18 INFO:GibbsSupervisedLDA:[ITER] 79,\tMAE:0.08,\tlog_likelihood:-945975.22\n",
      "2016-02-10 19:55:27 INFO:GibbsSupervisedLDA:[ITER] 80,\tMAE:0.08,\tlog_likelihood:-945828.06\n",
      "2016-02-10 19:55:35 INFO:GibbsSupervisedLDA:[ITER] 81,\tMAE:0.08,\tlog_likelihood:-945327.94\n",
      "2016-02-10 19:55:43 INFO:GibbsSupervisedLDA:[ITER] 82,\tMAE:0.08,\tlog_likelihood:-945460.64\n",
      "2016-02-10 19:55:52 INFO:GibbsSupervisedLDA:[ITER] 83,\tMAE:0.08,\tlog_likelihood:-944772.90\n",
      "2016-02-10 19:56:00 INFO:GibbsSupervisedLDA:[ITER] 84,\tMAE:0.08,\tlog_likelihood:-944241.21\n",
      "2016-02-10 19:56:09 INFO:GibbsSupervisedLDA:[ITER] 85,\tMAE:0.08,\tlog_likelihood:-944988.86\n",
      "2016-02-10 19:56:17 INFO:GibbsSupervisedLDA:[ITER] 86,\tMAE:0.08,\tlog_likelihood:-944814.22\n",
      "2016-02-10 19:56:26 INFO:GibbsSupervisedLDA:[ITER] 87,\tMAE:0.08,\tlog_likelihood:-945325.78\n",
      "2016-02-10 19:56:34 INFO:GibbsSupervisedLDA:[ITER] 88,\tMAE:0.08,\tlog_likelihood:-945067.91\n",
      "2016-02-10 19:56:43 INFO:GibbsSupervisedLDA:[ITER] 89,\tMAE:0.08,\tlog_likelihood:-944895.82\n",
      "2016-02-10 19:56:51 INFO:GibbsSupervisedLDA:[ITER] 90,\tMAE:0.08,\tlog_likelihood:-944184.63\n",
      "2016-02-10 19:56:59 INFO:GibbsSupervisedLDA:[ITER] 91,\tMAE:0.08,\tlog_likelihood:-944688.10\n",
      "2016-02-10 19:57:08 INFO:GibbsSupervisedLDA:[ITER] 92,\tMAE:0.08,\tlog_likelihood:-944621.01\n",
      "2016-02-10 19:57:16 INFO:GibbsSupervisedLDA:[ITER] 93,\tMAE:0.08,\tlog_likelihood:-944162.71\n",
      "2016-02-10 19:57:25 INFO:GibbsSupervisedLDA:[ITER] 94,\tMAE:0.08,\tlog_likelihood:-943703.28\n",
      "2016-02-10 19:57:33 INFO:GibbsSupervisedLDA:[ITER] 95,\tMAE:0.08,\tlog_likelihood:-943792.98\n",
      "2016-02-10 19:57:42 INFO:GibbsSupervisedLDA:[ITER] 96,\tMAE:0.08,\tlog_likelihood:-944534.72\n",
      "2016-02-10 19:57:50 INFO:GibbsSupervisedLDA:[ITER] 97,\tMAE:0.08,\tlog_likelihood:-944806.54\n",
      "2016-02-10 19:57:59 INFO:GibbsSupervisedLDA:[ITER] 98,\tMAE:0.08,\tlog_likelihood:-944684.98\n",
      "2016-02-10 19:58:07 INFO:GibbsSupervisedLDA:[ITER] 99,\tMAE:0.08,\tlog_likelihood:-943800.65\n"
     ]
    }
   ],
   "source": [
    "n_topic = 50\n",
    "r_var = 0.01\n",
    "\n",
    "model = GibbsSupervisedLDA(n_doc, n_voca, n_topic, sigma=r_var)\n",
    "model.fit(corpus, ratings)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Eta -0.234767820987 Topic 34 :\t bad,could,anything,get,never,movie,another,go,ca,script\n",
      "Eta -0.214572023979 Topic 41 :\t could,would,get,made,might,anything,many,good,look,never\n",
      "Eta -0.160506173159 Topic 38 :\t got,far,time,right,way,every,making,part,dull,guess\n",
      "Eta 0.0219171933193 Topic 49 :\t found,still,subject,intellectual,becomes,sense,tension,fine,talk,century\n",
      "Eta 0.0419401650241 Topic 10 :\t make,action,good,everything,acting,made,funny,dialogue,script,place\n",
      "Eta 0.0764470841209 Topic 27 :\t entertainment,value,lot,theme,violent,red,take,mostly,many,easy\n",
      "Eta 0.128975547132 Topic 21 :\t much,part,might,great,bad,getting,two,away,audience,least\n",
      "Eta 0.159533085017 Topic 36 :\t done,time,manner,dialogue,never,version,great,production,feeling,right\n",
      "Eta 0.184797336584 Topic 44 :\t opera,long,interesting,soap,go,telling,picture,chance,different,offering\n",
      "Eta 0.210652963064 Topic 31 :\t director,enough,done,many,shown,money,movie,must,give,blood\n",
      "Eta 0.232820782727 Topic 37 :\t good,look,everything,looking,also,romance,tale,new,thriller,find\n",
      "Eta 0.260212275042 Topic 11 :\t comedy,black,satire,comic,humor,love,going,cast,main,romantic\n",
      "Eta 0.339720354837 Topic 30 :\t mystery,thriller,suspense,plot,role,usual,give,horror,atmosphere,john\n",
      "Eta 0.346229992245 Topic 39 :\t role,case,watch,long,sense,back,problem,experience,well,type\n",
      "Eta 0.35836129898 Topic 28 :\t good,believe,without,go,god,church,back,interesting,still,much\n",
      "Eta 0.381319468906 Topic 23 :\t funny,also,way,think,humor,character,good,see,comedy,still\n",
      "Eta 0.384113637482 Topic 6 :\t special,make,movie,see,effects,would,first,something,though,horror\n",
      "Eta 0.396933018793 Topic 32 :\t director,scene,time,able,part,interesting,shot,interest,acting,set\n",
      "Eta 0.404554661869 Topic 5 :\t really,enough,way,could,something,much,movie,time,since,seem\n",
      "Eta 0.425037264045 Topic 33 :\t musical,music,best,french,dance,way,song,dancing,great,lively\n",
      "Eta 0.457287458236 Topic 22 :\t love,made,much,life,though,part,modern,times,acting,kind\n",
      "Eta 0.490046516159 Topic 40 :\t many,something,people,us,good,could,times,might,instead,small\n",
      "Eta 0.535690662439 Topic 14 :\t make,movie,great,would,work,comes,though,really,mark,audience\n",
      "Eta 0.544370355785 Topic 7 :\t get,really,anything,seeing,life,feel,difficult,see,enough,emotional\n",
      "Eta 0.545175280796 Topic 8 :\t killer,two,movie,together,couple,script,serial,though,best,ca\n",
      "Eta 0.565327709237 Topic 45 :\t almost,joan,making,sense,two,easily,care,lot,someone,person\n",
      "Eta 0.567068916427 Topic 12 :\t would,made,still,main,audience,goes,music,man,subject,felt\n",
      "Eta 0.580004732236 Topic 24 :\t could,showing,see,house,come,life,say,psychological,going,city\n",
      "Eta 0.588440544275 Topic 25 :\t noir,character,dark,gave,role,hero,also,study,genre,protagonist\n",
      "Eta 0.589570607942 Topic 17 :\t much,also,best,camera,thought,together,two,work,added,every\n",
      "Eta 0.594475657781 Topic 16 :\t western,artist,good,man,scene,action,art,pleasing,plenty,still\n",
      "Eta 0.598417849919 Topic 26 :\t war,men,made,sides,place,show,battle,submarine,military,performance\n",
      "Eta 0.607731679912 Topic 35 :\t message,world,audience,political,people,point,showing,time,public,country\n",
      "Eta 0.635974893904 Topic 15 :\t sex,culture,first,seem,comes,sexual,play,performance,direction,sense\n",
      "Eta 0.64483388889 Topic 3 :\t noir,shot,dark,made,ending,feel,style,city,mood,little\n",
      "Eta 0.646089932446 Topic 19 :\t love,tale,director,hollywood,get,much,make,could,never,away\n",
      "Eta 0.65050684023 Topic 48 :\t way,make,see,though,life,know,family,might,someone,real\n",
      "Eta 0.66143777888 Topic 1 :\t hollywood,action,role,say,star,get,office,certain,original,come\n",
      "Eta 0.686787742783 Topic 46 :\t sense,think,thing,would,much,something,new,better,saying,feeling\n",
      "Eta 0.709956673591 Topic 9 :\t director,political,never,right,making,bad,without,taken,seen,plot\n",
      "Eta 0.712450862064 Topic 29 :\t something,violence,way,say,many,get,society,think,everyone,point\n",
      "Eta 0.735116092694 Topic 13 :\t character,performance,real,someone,also,without,director,work,well,interesting\n",
      "Eta 0.779470138429 Topic 42 :\t much,work,though,works,yet,well,rather,would,odd,make\n",
      "Eta 0.80049939286 Topic 18 :\t many,better,movie,look,way,last,white,directed,book,mood\n",
      "Eta 0.891479893452 Topic 47 :\t way,also,two,making,good,fun,much,scene,seem,romantic\n",
      "Eta 0.900176187326 Topic 0 :\t could,made,used,seen,violence,man,different,cast,scene,john\n",
      "Eta 1.0136094881 Topic 43 :\t people,us,human,see,history,part,right,point,message,place\n",
      "Eta 1.05242575044 Topic 2 :\t american,might,also,director,work,take,much,always,society,america\n",
      "Eta 1.50749544462 Topic 20 :\t screen,might,see,masterpiece,way,work,version,must,better,well\n",
      "Eta 1.8658611489 Topic 4 :\t many,great,life,performance,best,look,something,way,see,theme\n"
     ]
    }
   ],
   "source": [
    "for ti in model.eta.argsort():\n",
    "    top_words = get_top_words(model.TW, voca, ti, n_words=10)\n",
    "    print('Eta', model.eta[ti] ,'Topic', ti ,':\\t', ','.join(top_words))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**The review about one movie, so the topics does not seem to be clearly distinguishable. At least, however, the most negative topics contain words such as `bad`, `never`, and `dull`. And the most positive topics contain word like `great`, `best`, and `masterpeice`.**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.3"
  },
  "toc": {
   "toc_cell": true,
   "toc_number_sections": true,
   "toc_threshold": 4,
   "toc_window_display": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
